source('../settings/settings.R')
source('commonFunctions.R')
inputFileDrive1 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=1, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive2 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=2, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive3 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=3, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive4 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=4, distPrev=30, distNext=30))

drive1 <- read.csv(inputFileDrive1)
drive2 <- read.csv(inputFileDrive2)
drive3 <- read.csv(inputFileDrive3)

drive4 <- read.csv(inputFileDrive4, stringsAsFactors = T)
set.seed(43)
combinedDf <- cbind(drive4, 
                    drive1$MeanPP_Seg0, 
                    drive2$MeanPP, drive3$MeanPP,
                    drive2$StdPP, drive3$StdPP,
                    drive2$MeanPP_SegMax, drive3$MeanPP_SegMax, 
                    drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
                    drive2$StdPP_SegMax, drive3$StdPP_SegMax, 
                    drive2$StdPP_Seg0, drive3$StdPP_Seg0,
                    drive2$MeanPP_AccHigh, drive3$MeanPP_AccHigh,
                    drive2$X.MeanPP_AccLow, drive3$X.MeanPP_AccLow,
                    drive2$StdPP_AccHigh, drive3$StdPP_AccHigh,
                    drive2$StdPP_AccLow, drive3$StdPP_AccLow
                  )
names(combinedDf) <- c(names(drive4), 
                       "PP_Dev_1_Turning",
                       "PP_Dev_2", "PP_Dev_3", 
                       "Std_PP_2", "Std_PP_3",
                       "PP_Dev_2_Straight", "PP_Dev_3_Straight", 
                       "PP_Dev_2_Turning", "PP_Dev_3_Turning", 
                       "Std_PP_2_Straight", "Std_PP_3_Straight", 
                       "Std_PP_2_Turning", "Std_PP_3_Turning",
                       "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                       "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                       "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                       "Std_PP_2_AccLow", "Std_PP_3_AccLow"
                       )

combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
combinedDf$ActivityEncoded <- factor(ifelse(combinedDf$Activity == "NO", "1", ifelse(combinedDf$Activity == "C", "2", "3")))

# combinedDf$PP_Dev_2_Turning <- ifelse(combinedDf$PP_Dev_2_Turning > 0, combinedDf$PP_Dev_2_Turning, combinedDf$PP_Dev_2_Straight)
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]

combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='green')
COLOR_FAILURE <- list(color='red')
COLOR_ACC <- list(color="orange")

yAxis <- list(
  title = 'Perinasal Perspiration (Log)',
  range=c(-0.3, 0.5)
)

# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_After # PP_Dev
ppDevArray <- matrix(ppDev, nrow = 1,ncol = length(ppDev))
  
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))
[1] "Threshold: 0.101235546875"
MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  # add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#01", xend="#41", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="No Stressor")

htmltools::tagList(fig_NoStressor)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#02", xend="#22", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#02", xend="#22", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Cognitive")

htmltools::tagList(fig_Cognitive)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Arousal in Drive C - Straight segment', marker=COLOR_COGNITIVE, width=870) %>%
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Arousal in Drive F - Under prior stressor', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Arousal in Drive F - Unintended acceleration', marker=COLOR_FAILURE) %>% 
  add_segments(x="#05", xend="#31", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#05", xend="#31", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Motoric")

htmltools::tagList(fig_Motoric)
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)

Extract data for important features

importantFeaturesDf <- combinedDf %>% select(Subject, Std_PP_3, PP_Dev_2_Turning, Activity, PP_Dev, PP_Dev_Group)

Linear model with all variables

linearModel1 <- lm(PP_After ~ 
              + PP_Dev_2_Straight
              + PP_Dev_3_Straight
              + PP_Dev_2_Turning
              + PP_Dev_3_Turning
              + Std_PP_2_Straight
              + Std_PP_3_Straight
              + Std_PP_2_Turning
              + Std_PP_3_Turning
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)

Call:
lm(formula = PP_After ~ +PP_Dev_2_Straight + PP_Dev_3_Straight + 
    PP_Dev_2_Turning + PP_Dev_3_Turning + Std_PP_2_Straight + 
    Std_PP_3_Straight + Std_PP_2_Turning + Std_PP_3_Turning + 
    PP_Prior + factor(ActivityEncoded), data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.076664 -0.027277 -0.000867  0.021999  0.100001 

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)  
(Intercept)              -0.07132    0.08729  -0.817   0.4350  
PP_Dev_2_Straight         0.71569    0.36574   1.957   0.0821 .
PP_Dev_3_Straight        -0.81109    0.44442  -1.825   0.1013  
PP_Dev_2_Turning         -0.55309    0.44548  -1.242   0.2458  
PP_Dev_3_Turning          0.66117    0.46801   1.413   0.1914  
Std_PP_2_Straight         1.41894    1.36691   1.038   0.3263  
Std_PP_3_Straight         1.27101    0.71461   1.779   0.1090  
Std_PP_2_Turning         -1.53955    1.68233  -0.915   0.3840  
Std_PP_3_Turning          0.16636    1.14505   0.145   0.8877  
PP_Prior                  0.70952    0.25103   2.826   0.0198 *
factor(ActivityEncoded)2  0.04926    0.07400   0.666   0.5224  
factor(ActivityEncoded)3  0.12239    0.05641   2.170   0.0582 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0682 on 9 degrees of freedom
Multiple R-squared:  0.9037,    Adjusted R-squared:  0.7859 
F-statistic: 7.675 on 11 and 9 DF,  p-value: 0.002452
plot(linearModel1)

linearModel1 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)

Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow + 
    Mean_PP_3_AccHigh + Mean_PP_3_AccLow + Std_PP_2_AccHigh + 
    Std_PP_2_AccLow + Std_PP_3_AccHigh + Std_PP_3_AccLow + factor(ActivityEncoded), 
    data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.110015 -0.048043  0.009167  0.036551  0.082263 

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)   
(Intercept)              -0.36297    0.09880  -3.674  0.00429 **
Mean_PP_2_AccHigh         2.14482    0.63161   3.396  0.00682 **
Mean_PP_2_AccLow         -1.74610    0.63007  -2.771  0.01974 * 
Mean_PP_3_AccHigh         2.99003    0.78293   3.819  0.00338 **
Mean_PP_3_AccLow         -2.40638    0.75184  -3.201  0.00948 **
Std_PP_2_AccHigh          5.27168    4.04124   1.304  0.22130   
Std_PP_2_AccLow          -4.34469    2.79301  -1.556  0.15087   
Std_PP_3_AccHigh          0.67278    1.84262   0.365  0.72262   
Std_PP_3_AccLow           3.14657    2.14416   1.468  0.17297   
factor(ActivityEncoded)2  0.18930    0.05019   3.771  0.00365 **
factor(ActivityEncoded)3  0.13988    0.05308   2.635  0.02494 * 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07638 on 10 degrees of freedom
Multiple R-squared:  0.8657,    Adjusted R-squared:  0.7315 
F-statistic: 6.449 on 10 and 10 DF,  p-value: 0.003428
plot(linearModel1)

With Prior

linearModelWPrior <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModelWPrior)

Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow + 
    Mean_PP_3_AccHigh + Mean_PP_3_AccLow + Std_PP_2_AccHigh + 
    Std_PP_2_AccLow + Std_PP_3_AccHigh + Std_PP_3_AccLow + PP_Prior + 
    factor(ActivityEncoded), data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.067676 -0.021534 -0.007049  0.015917  0.088012 

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)   
(Intercept)              -0.26129    0.07583  -3.446  0.00733 **
Mean_PP_2_AccHigh         1.63612    0.46949   3.485  0.00689 **
Mean_PP_2_AccLow         -1.35111    0.45858  -2.946  0.01632 * 
Mean_PP_3_AccHigh         1.63055    0.68413   2.383  0.04100 * 
Mean_PP_3_AccLow         -1.46332    0.59910  -2.443  0.03721 * 
Std_PP_2_AccHigh          6.65712    2.87230   2.318  0.04566 * 
Std_PP_2_AccLow          -4.05512    1.96633  -2.062  0.06922 . 
Std_PP_3_AccHigh         -0.99942    1.38885  -0.720  0.49003   
Std_PP_3_AccLow           3.57312    1.51344   2.361  0.04254 * 
PP_Prior                  0.63520    0.18967   3.349  0.00854 **
factor(ActivityEncoded)2  0.12025    0.04088   2.942  0.01645 * 
factor(ActivityEncoded)3  0.15861    0.03775   4.202  0.00230 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05372 on 9 degrees of freedom
Multiple R-squared:  0.9402,    Adjusted R-squared:  0.8672 
F-statistic: 12.87 on 11 and 9 DF,  p-value: 0.00033
plot(linearModelWPrior)

NaNs producedNaNs produced

linearModel3 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel3)

Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow + 
    Mean_PP_3_AccHigh + Mean_PP_3_AccLow + factor(ActivityEncoded), 
    data = combinedDf)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.135285 -0.049920 -0.003805  0.045609  0.123392 

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)   
(Intercept)              -0.21802    0.06604  -3.301  0.00525 **
Mean_PP_2_AccHigh         2.11765    0.65809   3.218  0.00620 **
Mean_PP_2_AccLow         -1.58805    0.65839  -2.412  0.03016 * 
Mean_PP_3_AccHigh         2.97984    0.73529   4.053  0.00119 **
Mean_PP_3_AccLow         -2.53203    0.69956  -3.619  0.00279 **
factor(ActivityEncoded)2  0.17285    0.05284   3.271  0.00557 **
factor(ActivityEncoded)3  0.16446    0.04579   3.592  0.00295 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.08212 on 14 degrees of freedom
Multiple R-squared:  0.7828,    Adjusted R-squared:  0.6896 
F-statistic: 8.407 on 6 and 14 DF,  p-value: 0.0005398
plot(linearModel3)

# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel1)$coefficients
lmAnova <- anova(linearModel1)

print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Sat Jul 11 18:00:55 2020
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
  \hline
(Intercept) & -0.36297 & 0.09880 & -3.67377 & 0.00429 \\ 
  Mean\_PP\_2\_AccHigh & 2.14482 & 0.63161 & 3.39577 & 0.00682 \\ 
  Mean\_PP\_2\_AccLow & -1.74610 & 0.63007 & -2.77127 & 0.01974 \\ 
  Mean\_PP\_3\_AccHigh & 2.99003 & 0.78293 & 3.81901 & 0.00338 \\ 
  Mean\_PP\_3\_AccLow & -2.40638 & 0.75184 & -3.20067 & 0.00948 \\ 
  Std\_PP\_2\_AccHigh & 5.27168 & 4.04124 & 1.30447 & 0.22130 \\ 
  Std\_PP\_2\_AccLow & -4.34469 & 2.79301 & -1.55556 & 0.15087 \\ 
  Std\_PP\_3\_AccHigh & 0.67278 & 1.84262 & 0.36512 & 0.72262 \\ 
  Std\_PP\_3\_AccLow & 3.14657 & 2.14416 & 1.46750 & 0.17297 \\ 
  factor(ActivityEncoded)2 & 0.18930 & 0.05019 & 3.77139 & 0.00365 \\ 
  factor(ActivityEncoded)3 & 0.13988 & 0.05308 & 2.63525 & 0.02494 \\ 
   \hline
\end{tabular}
\end{table}
print(xtable(lmAnova), digits=c(0,5,5,5,5))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Sat Jul 11 18:00:55 2020
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrr}
  \hline
 & Df & Sum Sq & Mean Sq & F value & Pr($>$F) \\ 
  \hline
Mean\_PP\_2\_AccHigh & 1 & 0.15 & 0.15 & 26.27 & 0.0004 \\ 
  Mean\_PP\_2\_AccLow & 1 & 0.00 & 0.00 & 0.32 & 0.5845 \\ 
  Mean\_PP\_3\_AccHigh & 1 & 0.01 & 0.01 & 1.09 & 0.3207 \\ 
  Mean\_PP\_3\_AccLow & 1 & 0.06 & 0.06 & 11.06 & 0.0077 \\ 
  Std\_PP\_2\_AccHigh & 1 & 0.00 & 0.00 & 0.06 & 0.8116 \\ 
  Std\_PP\_2\_AccLow & 1 & 0.00 & 0.00 & 0.50 & 0.4940 \\ 
  Std\_PP\_3\_AccHigh & 1 & 0.03 & 0.03 & 5.02 & 0.0490 \\ 
  Std\_PP\_3\_AccLow & 1 & 0.02 & 0.02 & 3.14 & 0.1070 \\ 
  factor(ActivityEncoded) & 2 & 0.10 & 0.05 & 8.51 & 0.0069 \\ 
  Residuals & 10 & 0.06 & 0.01 &  &  \\ 
   \hline
\end{tabular}
\end{table}
ppAfter <- combinedDf$PP_After
ppAfterArray <- matrix(ppAfter, nrow = 1,ncol = length(ppAfter))
  
thresholdPPAfter <- otsu(ppAfterArray, range=c(min(ppAfter), max(ppAfter))) # Expected Threshold > 0.042
print(paste0('Threshold: ', thresholdPPAfter))
[1] "Threshold: 0.101235546875"
selectedDf <- combinedDf %>% select(
                  "Subject", "Activity", "PP_After", "PP_Prior",
                  "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                  "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                  "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                  "Std_PP_2_AccLow", "Std_PP_3_AccLow")

selectedDf$Subject <- NULL
selectedDf$Activity_NO <- ifelse(selectedDf$Activity == "NO", 1, 0)
selectedDf$Activity_C <- ifelse(selectedDf$Activity == "C", 1, 0)
selectedDf$Activity_M <- ifelse(selectedDf$Activity == "M", 1, 0)
selectedDf$Activity <- NULL

# selectedDf$PP_Dev_1_Turning <- NULL
# selectedDf$Std_PP_2_Straight <- NULL
# selectedDf$Std_PP_2_Turning <- NULL
# selectedDf$Std_PP_3_Straight <- NULL
# selectedDf$Std_PP_3_Turning <- NULL
# 
# # According to Linear model
# selectedDf$PP_Dev_2_Straight <- abs(selectedDf$PP_Dev_2_Straight)
# selectedDf$PP_Dev_3_Straight <- abs(selectedDf$PP_Dev_3_Straight)
# selectedDf$PP_Dev_2_Turning <- abs(selectedDf$PP_Dev_2_Turning)
# selectedDf$PP_Dev_3_Turning <- abs(selectedDf$PP_Dev_3_Turning)
# selectedDf$PP_Prior <- abs(selectedDf$PP_Prior) # NULL

selectedDf$Class <- ifelse(selectedDf$PP_After >= thresholdPPAfter, T, F)
selectedDf$PP_After <- NULL

print(names(selectedDf))
 [1] "PP_Prior"          "Mean_PP_2_AccHigh" "Mean_PP_3_AccHigh" "Mean_PP_2_AccLow"  "Mean_PP_3_AccLow"  "Std_PP_2_AccHigh" 
 [7] "Std_PP_3_AccHigh"  "Std_PP_2_AccLow"   "Std_PP_3_AccLow"   "Activity_NO"       "Activity_C"        "Activity_M"       
[13] "Class"            
# library(mefa)
# combinedDf <- rep(combinedDf, 10) 
set.seed(43)
n_folds <- 3
params <- param <- list(objective       = "binary:logistic", 
               booster          = "gbtree",
               eval_metric      = "auc",
               eta              = 0.1,
               max_depth        = 10,
               alpha            = 1,
               lambda           = 0,
               gamma            = 0.45,
               min_child_weight = 0.3,
               subsample        = 0.5,
               colsample_bytree = 1)
           
# XGBoost Model         
xgb_m <- xgb.cv(   params               = param,
                  data = as.matrix(selectedDf %>% select(-Class)) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)

# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]
NA

Performance Metrics

# Prediction
selectedDf$clsPred <- round(xgb_m$pred)

computePerformanceResults <- function(sdat){
  sdat = sdat[complete.cases(sdat),]
  acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
  conf_mat = table(sdat)
  specif = conf_mat[1,1]/sum(conf_mat[,1])
  sensiv = conf_mat[2,2]/sum(conf_mat[,2])
  preci =  conf_mat[2,2]/sum(conf_mat[2,])
  npv =    conf_mat[1,1]/sum(conf_mat[1,])
  return(c(acc,specif,sensiv,preci,npv))
}

# Get average performance
performance <- computePerformanceResults(selectedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])

print(paste("Accuracy=", round(acc, 2)))
[1] "Accuracy= 0.71"
print(paste("Precision=", round(prec, 2)))
[1] "Precision= 0.33"
print(paste("Recall=", round(recall, 2)))
[1] "Recall= 1"
print(paste("Specificity=", round(spec, 2)))
[1] "Specificity= 0.67"
print(paste("NPV=", round(npv, 2)))
[1] "NPV= 1"
print(paste("F1=", round(f1, 2)))
[1] "F1= 0.5"
print(paste("AUC=", round(auc, 2)))
[1] "AUC= 0.65"
# Importance
bst <- xgboost(   params               = param,
                  data = as.matrix(selectedDf %>% select(-c(Class, clsPred))) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)
importanceDf <- xgb.importance(colnames(selectedDf %>% select(-c(Class, clsPred))), model = bst)
print(importanceDf)
library(pROC)

dfROC <- pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<")

# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter 

plot(pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<"), 
     legacy.axes = TRUE,
     main="ROC Curve", 
     lwd=1.5) 

Plot feature importance

yAxis <- list(
  title = 'Importance',
  range=c(0.0, 1.0)
)
xAxis <- list(
  title = ''
)

importanceDf$Feature <- factor(importanceDf$Feature, levels = importanceDf[order(-Gain),]$Feature)
fig_Importance <- plot_ly(importanceDf, x = ~Feature, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
  add_trace(y = ~Cover, name = 'Cover') %>% 
  add_trace(y = ~Frequency, name = 'Frequency') %>% 
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>% 
  config(.Last.value, mathjax = 'cdn')

htmltools::tagList(fig_Importance)
---
title: "R Notebook"
output: html_notebook
---

```{r}
source('../settings/settings.R')
source('commonFunctions.R')
```

```{r}
inputFileDrive1 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=1, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive2 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=2, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive3 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=3, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive4 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=4, distPrev=30, distNext=30))

drive1 <- read.csv(inputFileDrive1)
drive2 <- read.csv(inputFileDrive2)
drive3 <- read.csv(inputFileDrive3)

drive4 <- read.csv(inputFileDrive4, stringsAsFactors = T)
```

```{r}
set.seed(43)
combinedDf <- cbind(drive4, 
                    drive1$MeanPP_Seg0, 
                    drive2$MeanPP, drive3$MeanPP,
                    drive2$StdPP, drive3$StdPP,
                    drive2$MeanPP_SegMax, drive3$MeanPP_SegMax, 
                    drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
                    drive2$StdPP_SegMax, drive3$StdPP_SegMax, 
                    drive2$StdPP_Seg0, drive3$StdPP_Seg0,
                    drive2$MeanPP_AccHigh, drive3$MeanPP_AccHigh,
                    drive2$X.MeanPP_AccLow, drive3$X.MeanPP_AccLow,
                    drive2$StdPP_AccHigh, drive3$StdPP_AccHigh,
                    drive2$StdPP_AccLow, drive3$StdPP_AccLow
                  )
names(combinedDf) <- c(names(drive4), 
                       "PP_Dev_1_Turning",
                       "PP_Dev_2", "PP_Dev_3", 
                       "Std_PP_2", "Std_PP_3",
                       "PP_Dev_2_Straight", "PP_Dev_3_Straight", 
                       "PP_Dev_2_Turning", "PP_Dev_3_Turning", 
                       "Std_PP_2_Straight", "Std_PP_3_Straight", 
                       "Std_PP_2_Turning", "Std_PP_3_Turning",
                       "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                       "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                       "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                       "Std_PP_2_AccLow", "Std_PP_3_AccLow"
                       )

combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
combinedDf$ActivityEncoded <- factor(ifelse(combinedDf$Activity == "NO", "1", ifelse(combinedDf$Activity == "C", "2", "3")))

# combinedDf$PP_Dev_2_Turning <- ifelse(combinedDf$PP_Dev_2_Turning > 0, combinedDf$PP_Dev_2_Turning, combinedDf$PP_Dev_2_Straight)
```

```{r}
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]

combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
```

```{r}
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='green')
COLOR_FAILURE <- list(color='red')
COLOR_ACC <- list(color="orange")

yAxis <- list(
  title = 'Perinasal Perspiration (Log)',
  range=c(-0.3, 0.5)
)

# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_After # PP_Dev
ppDevArray <- matrix(ppDev, nrow = 1,ncol = length(ppDev))
  
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))

MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
```

```{r, warning=F}
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  # add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#01", xend="#41", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="No Stressor")

htmltools::tagList(fig_NoStressor)
```

```{r, warning=F}
fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Failure - PP Deviation', marker=COLOR_FAILURE) %>% 
  add_segments(x="#02", xend="#22", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold: Mild Change of PP",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#02", xend="#22", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Cognitive")

htmltools::tagList(fig_Cognitive)
```



```{r, warning=F}
fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~Subject, y = ~PP_Dev_2_Straight, type = 'bar', name = 'Arousal in Drive C - Straight segment', marker=COLOR_COGNITIVE, width=870) %>%
  add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(y = ~Mean_PP_2_AccHigh, name = 'Coginitive - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(y = ~Mean_PP_3_AccHigh, name = 'Motoric - Mean PP (High Accel.)', marker=COLOR_ACC) %>% 
  add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  add_trace(y = ~PP_Prior, name = 'Arousal in Drive F - Under prior stressor', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(y = ~PP_After, name = 'Arousal in Drive F - Unintended acceleration', marker=COLOR_FAILURE) %>% 
  add_segments(x="#05", xend="#31", y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#05", xend="#31", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, barmode = 'group', title="Stressor = Motoric")

htmltools::tagList(fig_Motoric)
```


```{r}
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)
```

### Extract data for important features
```{r}
importantFeaturesDf <- combinedDf %>% select(Subject, Std_PP_3, PP_Dev_2_Turning, Activity, PP_Dev, PP_Dev_Group)
```

# Linear model with all variables
```{r}
linearModel1 <- lm(PP_After ~ 
              + PP_Dev_2_Straight
              + PP_Dev_3_Straight
              + PP_Dev_2_Turning
              + PP_Dev_3_Turning
              + Std_PP_2_Straight
              + Std_PP_3_Straight
              + Std_PP_2_Turning
              + Std_PP_3_Turning
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)
plot(linearModel1)
```

```{r}
linearModel1 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)
plot(linearModel1)
```

## With Prior
```{r}
linearModelWPrior <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModelWPrior)
plot(linearModelWPrior)
```

```{r}
linearModel3 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel3)
plot(linearModel3)
```


```{r}
# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel1)$coefficients
lmAnova <- anova(linearModel1)

print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
print(xtable(lmAnova), digits=c(0,5,5,5,5))

```


```{r}
ppAfter <- combinedDf$PP_After
ppAfterArray <- matrix(ppAfter, nrow = 1,ncol = length(ppAfter))
  
thresholdPPAfter <- otsu(ppAfterArray, range=c(min(ppAfter), max(ppAfter))) # Expected Threshold > 0.042
print(paste0('Threshold: ', thresholdPPAfter))

selectedDf <- combinedDf %>% select(
                  "Subject", "Activity", "PP_After", "PP_Prior",
                  "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                  "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                  "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                  "Std_PP_2_AccLow", "Std_PP_3_AccLow")

selectedDf$Subject <- NULL
selectedDf$Activity_NO <- ifelse(selectedDf$Activity == "NO", 1, 0)
selectedDf$Activity_C <- ifelse(selectedDf$Activity == "C", 1, 0)
selectedDf$Activity_M <- ifelse(selectedDf$Activity == "M", 1, 0)
selectedDf$Activity <- NULL

# selectedDf$PP_Dev_1_Turning <- NULL
# selectedDf$Std_PP_2_Straight <- NULL
# selectedDf$Std_PP_2_Turning <- NULL
# selectedDf$Std_PP_3_Straight <- NULL
# selectedDf$Std_PP_3_Turning <- NULL
# 
# # According to Linear model
# selectedDf$PP_Dev_2_Straight <- abs(selectedDf$PP_Dev_2_Straight)
# selectedDf$PP_Dev_3_Straight <- abs(selectedDf$PP_Dev_3_Straight)
# selectedDf$PP_Dev_2_Turning <- abs(selectedDf$PP_Dev_2_Turning)
# selectedDf$PP_Dev_3_Turning <- abs(selectedDf$PP_Dev_3_Turning)
# selectedDf$PP_Prior <- abs(selectedDf$PP_Prior) # NULL

selectedDf$Class <- ifelse(selectedDf$PP_After >= thresholdPPAfter, T, F)
selectedDf$PP_After <- NULL

print(names(selectedDf))
```

```{r}
# library(mefa)
# combinedDf <- rep(combinedDf, 10) 
```

```{r}
set.seed(43)
n_folds <- 3
params <- param <- list(objective       = "binary:logistic", 
               booster          = "gbtree",
               eval_metric      = "auc",
               eta              = 0.1,
               max_depth        = 10,
               alpha            = 1,
               lambda           = 0,
               gamma            = 0.45,
               min_child_weight = 0.3,
               subsample        = 0.5,
               colsample_bytree = 1)
           
# XGBoost Model         
xgb_m <- xgb.cv(   params               = param,
                  data = as.matrix(selectedDf %>% select(-Class)) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)

# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]

```

## Performance Metrics
```{r}
# Prediction
selectedDf$clsPred <- round(xgb_m$pred)

computePerformanceResults <- function(sdat){
  sdat = sdat[complete.cases(sdat),]
  acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
  conf_mat = table(sdat)
  specif = conf_mat[1,1]/sum(conf_mat[,1])
  sensiv = conf_mat[2,2]/sum(conf_mat[,2])
  preci =  conf_mat[2,2]/sum(conf_mat[2,])
  npv =    conf_mat[1,1]/sum(conf_mat[1,])
  return(c(acc,specif,sensiv,preci,npv))
}

# Get average performance
performance <- computePerformanceResults(selectedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])

print(paste("Accuracy=", round(acc, 2)))
print(paste("Precision=", round(prec, 2)))
print(paste("Recall=", round(recall, 2)))
print(paste("Specificity=", round(spec, 2)))
print(paste("NPV=", round(npv, 2)))
print(paste("F1=", round(f1, 2)))
print(paste("AUC=", round(auc, 2)))
```

```{r}
# Importance
bst <- xgboost(   params               = param,
                  data = as.matrix(selectedDf %>% select(-c(Class, clsPred))) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)
importanceDf <- xgb.importance(colnames(selectedDf %>% select(-c(Class, clsPred))), model = bst)
print(importanceDf)
```

```{r}
library(pROC)

dfROC <- pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<")

# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter 

plot(pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<"), 
     legacy.axes = TRUE,
     main="ROC Curve", 
     lwd=1.5) 
```


### Plot feature importance
```{r}
yAxis <- list(
  title = 'Importance',
  range=c(0.0, 1.0)
)
xAxis <- list(
  title = ''
)

importanceDf$Feature <- factor(importanceDf$Feature, levels = importanceDf[order(-Gain),]$Feature)
fig_Importance <- plot_ly(importanceDf, x = ~Feature, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
  add_trace(y = ~Cover, name = 'Cover') %>% 
  add_trace(y = ~Frequency, name = 'Frequency') %>% 
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>% 
  config(.Last.value, mathjax = 'cdn')

htmltools::tagList(fig_Importance)
```


